* MEDIATION ANALYSIS
*********************
*********************


clear all
set more off
capture log close

set maxvar 10000

	
 if c(username) == "alison_a" {
        global ROOT "C:\Users\alison_a\Dropbox\HI_shared (1)\"
		global ROOT1 "C:\Users\alison_a\Dropbox\HI_shared (1)\"
    }
			
global output 				"$ROOT/endline report/draft 2"
global do 					"$ROOT/do-files"

global data_constructed 	"C:\Users\alison_a\Dropbox\HI_shared (1)\endline report\FINAL\DATA"

global rw 					"$ROOT/data/RW/T-value distributions"
		
*run programme to draw tables
do "$do/stata programs for drawing tables/programs for endline tables, standardisations etc"


* LOAD IN CHILD LEVEL DATA
***************************
use "$data_constructed\child_analysis_data"
rename mac_say_and_und_0* macarthur_0*
	

* correct IDs for children who moved HHs
*******************************************
	*FROM DIANA'S DO-FILES: Some children's household changed, so their IDs changed. To correct this:
	replace ll_nino=430002000021 if ll_nino==430012000021
	replace ll_nino=960000600007 if ll_nino==960010600007
	
	*deal with duplicates
	sort ll_nino
	foreach i in rutinas_NP rutinas_P1 rutinas_P2 educmaestra_0 expmaestra_0 depresion_0 jobsatisfaction_0 total_burnout_0 {
		gen `i'temp = `i' 
		bys ll_nino: egen `i'_2 = min(`i'temp)
		replace `i'= `i'_2 if (ll_nino==430002000021 | ll_nino==960000600007)
		drop `i'_2 `i'temp
		}
	drop if ll_maest==. & (ll_nino==430002000021 | ll_nino==960000600007)
	

* MERGE IN LINKING FILE
************************
	preserve
	use  "$data_constructed/Linking information.dta", clear
	foreach i in BLFU test_ASQcog_BL test_ASQsoc_BL test_macarthur_BL child_teacher_BL ecers_BL iters_BL  socpro_FU nutr_FU /*
	*/ test_tvip_FU test_daberon_FU test_ptt_FU test_wm5_FU test_wm12_FU test_wm14_FU test_wm17_FU test_wm21_FU attendFU_m0 idclass_FU /*
	*/ child_teacher_FU ecers_FU iters_FU grupo itt_1 itt_2 itt_3 ll_maest_BL ll_hf ll_maest ll_inst	{
		ren `i' LF`i'
	}
	tempfile linking_data
		save `linking_data'
	restore
	
	merge 1:1 ll_nino using `linking_data', assert(matched)
	drop _m
	ren ll_maest ll_maest_BL 
	
	
* MERGE IN ECERS/ITERS
***********************
preserve
use "$data_constructed/ECERS_ITERS", clear
keep if wave==1
keep ll_* itec*
rename itec* itec*_1
tempfile itec_fu
save `itec_fu'
restore
	
preserve
use "$data_constructed/ECERS_ITERS", clear
keep if wave==0
rename ll_aulaclass ll_aulaclass_BL
keep ll_* itec*
rename itec* itec*_0
tempfile itec_bl
save `itec_bl'
restore
	
merge m:1 	ll_aulaclass_BL using `itec_bl'
drop if _merge==2
drop _merge

merge m:1 	ll_aulaclass using `itec_fu'
drop if _merge==2
drop _merge

* AND CENTRE LEVEL AVERAGES
***************************
preserve
use "$data_constructed/ECERS_ITERS", clear
keep if wave==1
keep ll_* itec*
collapse itec*, by(ll_inst)
rename itec* itec*_1_centre
tempfile itec_fu_c
save `itec_fu_c'
restore
	
preserve
use "$data_constructed/ECERS_ITERS", clear
keep if wave==0
keep ll_* itec*
collapse itec*, by(ll_inst)
rename itec* itec*_0_centre
tempfile itec_bl_c
save `itec_bl_c'
restore

merge m:1 	ll_inst using `itec_bl_c'
drop if _merge==2
drop _merge

merge m:1 	ll_inst using `itec_fu_c'
drop if _merge==2
drop _merge


* CREATE CHILD DEVELOPMENT FACTORS
*************************************
* drop outliers - zscore of <-3 in any one of cognitive tests -- not on 5 and 21 - we will drop these
	
	foreach x in IRT_tvip_as_z  wm12_as_z  IRT_wm14_as_z  IRT_wm17_as_z   ptt_as_z  IRT_dab_as_z  { // CHANGED TO IRT -- 11.12.15
		di "`x'"
		drop if `x'<-3
		count if `x'>3 & `x'~=.
		}

	* control variable globals
	tab dane, gen(dane_)
	tab cc_tester, gen(cc_tester_)
	global bl_tests "asq_com_0_as_z asq_mg_0_as_z asq_ps_0_as_z asq_soc_0_as_z asq_fm_0_as_z macarthur_0_as_z" // could change to as !!!!!!!!!!!!!!!!!!!!!!!
	global bl_tests_temp "asq_com_0_ats_z asq_mg_0_ats_z asq_ps_0_ats_z asq_soc_0_ats_z asq_fm_0_ats_z macarthur_0_ats_z" // could change to as !!!!!!!!!!!!!!!!!!!!!!!
	global controls_child "age_months_ex*t male dane_*"
	global controls_child_tests "male dane_*" 
	
	* ALL TESTS
	********************
	* LABEL TESTS
	*********************
	*label var wm5_as_z "Woodcock Munoz - 5"
	label var wm12_as_z "Woodcock Munoz - 12"
	label var IRT_wm14_as_z "Woodcock Munoz - 14"
	label var IRT_wm17_as_z "Woodcock Munoz - 17"
	label var IRT_wm21_as_z "Woodcock Munoz - 21"
	label var IRT_tvip_as_z "TVIP"
	label var ptt_as_z "Pencil Tapping Test"
	label var IRT_dab_as_z "Daberon - total"	
	
	
	* COG FACTOR 1
	factor IRT_tvip_as_z IRT_dab_as_z ptt_as_z wm12_as_z IRT_wm14_as_z IRT_wm17_as_z, mineigen(1) blanks(0.25)
	predict factor_cog1_as_fullirt
	lab var factor_cog1_as_fullirt "Cog - inc PTT - full irt - alt controls"	
	sum factor_cog1_as_fullirt if itt2==0 & itt3==0
	replace factor_cog1_as_fullirt=(factor_cog1_as_fullirt-r(mean))/r(sd)

		
	* COG FACTOR 2 - EXC PTT
	factor IRT_tvip_as_z IRT_dab_as_z  wm12_as_z IRT_wm14_as_z IRT_wm17_as_z, mineigen(1) blanks(0.25)
	predict factor_cog2_as_fullirt
	label var factor_cog2 "Cog, Lang and Sch (exc. PTT, full sample)"
	sum factor_cog2_as_fullirt if itt2==0 & itt3==0
	replace factor_cog2_as_fullirt=(factor_cog2_as_fullirt-r(mean))/r(sd)	
	
	
	* COG FACTOR 3 - LIMITED SAMPLE
	gen factor_cog3_as_fullirt=factor_cog2_as_fullirt if factor_cog1_as_fullirt~=.
	lab var factor_cog3_as_fullirt "Cog, Lang and Sch (exc. PTT, limited sample)"	
	
	sum factor_cog3_as_fullirt if itt2==0 & itt3==0
	replace factor_cog3_as_fullirt=(factor_cog3_as_fullirt-r(mean))/r(sd)
	
	
	* PRELIT
	factor IRT_tvip_as_z   IRT_wm14_as_z IRT_wm17_as_z, mineigen(1) blanks(0.25)
	predict factor_prelit_as_fullirt
	label var factor_prelit "Pre-literacy skills"

	sum factor_prelit_as_fullirt if itt2==0 & itt3==0
	replace factor_prelit_as_fullirt=(factor_prelit_as_fullirt-r(mean))/r(sd)	
	
	
* replace missing controls with means and create dumies

foreach x of varlist $bl_tests   {
	gen miss_`x'=(`x'==.)
	sum `x'
	replace `x'=r(mean) if `x'==.
	}

gen itec_centre=1	
foreach x of varlist itec_f*_z_1_centre   {
	replace itec_centre=0 if `x'==.
	}

gen itec_class=0	
foreach x of varlist itec_f*_z_1   {
	replace itec_class=1 if `x'~=.
	}
	

	
* MEDIATION ANALYSIS
***********************	

* CLASS LEVEL ITEC - FU
*replace if have one subscale missing
foreach x of varlist itec_f*_z_1   {
	gen itec_miss_`x'=(`x'==.) if  itec_class==1
	sum `x'
	replace `x'=r(mean) if `x'==. & itec_class==1
	}
	
* TE REGRESSION - WHOLE SAMPLE 
reg factor_cog1_as_fullirt itt2 itt3 cc_tester_* miss_* $bl_tests  $controls_child_tests, cl(ll_inst)	


* CENTRE LEVEL ITEC - FU

* TE reg for this subsample
reg factor_cog1_as_fullirt itt2 itt3 cc_tester_* miss_* $bl_tests  $controls_child_tests if itec_class==1 , cl(ll_inst)	
outreg2 using "$output/mediation",  excel  keep(itt2 itt3 itec*) replace

* add in centre level ecers and iters
reg factor_cog1_as_fullirt itt2 itt3 cc_tester_* miss_* itec_f*_z_1 itec_miss_* $bl_tests  $controls_child_tests if itec_class==1 , cl(ll_inst)	
outreg2 using "$output/mediation", excel keep(itt2 itt3 itec_f1_z_1 itec_f1_z_1 itec_f2_z_1 itec_f3_z_1 itec_f4_z_1 itec_f5_z_1 itec_f6_z_1 itec_f7_z_1_centre) 






* TE REGRESSION - WHOLE SAMPLE 
reg factor_cog1_as_fullirt itt2 itt3 cc_tester_* miss_* $bl_tests  $controls_child_tests, cl(ll_inst)	

* CENTRE LEVEL ITEC - FU

* TE reg for this subsample
reg factor_cog1_as_fullirt itt2 itt3 cc_tester_* miss_* $bl_tests  $controls_child_tests if itec_centre==1 , cl(ll_inst)	
outreg2 using "$output/mediation",  excel  keep(itt2 itt3 itec*) replace

* add in centre level ecers and iters
reg factor_cog1_as_fullirt itt2 itt3 cc_tester_* miss_* itec_f*_z_1_centre $bl_tests  $controls_child_tests if itec_centre==1 , cl(ll_inst)	
outreg2 using "$output/mediation", excel keep(itt2 itt3 itec_f1_z_1_centre itec_f1_z_1_centre itec_f2_z_1_centre itec_f3_z_1_centre itec_f4_z_1_centre itec_f5_z_1_centre itec_f6_z_1_centre itec_f7_z_1_centre) 


*****************************************************
* MEDIATION ANALYSIS BOOTSTRAP
drop itec_f1_z_1_centre itec_f6_z_1_centre itec_f7_z_1_centre  // not relevant for mediation hypothesis

local replace replace
local B=1000

	cap drop __*
	
local replace replace

gen diff=.
	
keep  if itec_centre==1
	
foreach y of varlist  factor_*   {

		reg `y' itt2 itt3 cc_tester_* miss_* $bl_tests  $controls_child_tests if itec_centre==1 , cl(ll_inst)	
		local unmed =_b[itt3]

		reg `y' itt2 itt3 cc_tester_* miss_* itec_f*_z_1_centre $bl_tests  $controls_child_tests if itec_centre==1 , cl(ll_inst)	
		local med =_b[itt3]
	
		local ourdiff=`unmed'-`med'
		
	forvalues i=1/`B' {
		preserve
		bsample, cluster(ll_inst)
		qui reg `x' itt2 itt3 miss_* cc_tester_* $bl_tests  dane_* male    
		scalar unmed_b_`x' =_b[itt3]
		qui reg `x' itt2 itt3 itec_f*_z_1_centre miss_* cc_tester_* $bl_tests  dane_* male    
		scalar med_b_`x' =_b[itt3]
		restore
		di "`y' --- `i'"
		qui replace diff=unmed_b_`x'-med_b_`x' in `i'
		}
	
	* Smaller than first spec (one tailed)
	sum diff
	replace diff=diff-r(mean) // center
	di `ourdiff'
	count if diff>`ourdiff' & diff~=.
	local nnn=r(N)
	count if diff~=.
	
	local p= `nnn'/r(N)
	di "`p'"
	
	* TE reg for this subsample
	reg `y' itt2 itt3 cc_tester_* miss_* $bl_tests  $controls_child_tests if itec_centre==1 , cl(ll_inst)	
	outreg2 using "$output/mediation",  excel  keep(itt2 itt3 itec*) `replace'

	* add in centre level ecers and iters
	reg `y' itt2 itt3 cc_tester_* miss_* itec_f*_z_1_centre $bl_tests  $controls_child_tests if itec_centre==1 , cl(ll_inst)	
	outreg2 using "$output/mediation", excel keep(itt2 itt3   itec_f2_z_1_centre itec_f3_z_1_centre itec_f4_z_1_centre itec_f5_z_1_centre   ) addstat( P-value, `p')
	local replace 
	}
	


	

